Combination of results from different analysis for metabolic changes in IBD patients

Jan Taubenheim

2022-04-26

require(data.table)
require(ggplot2)
require(ComplexUpset)
require(clusterProfiler)
require(psych)
require(lme4)
require(lmerTest)

1 Introduction

The general idea is to find changes in the metabolic network of IBD patients which are associated with a change the disease activity scores. I analysed data sets for the reaction activity scores, the presence/absence of reactions after context specific model reconstruction and FVA analysis using a reaction based LMM approach. This results in sets of reactions, which are associated with the disease activity. Furthermore, I have made set enrichments for the subsystems - again something which can be compared for the different analysis.

2 Results

2.1 Reaction association

First lets look at the reactions which are shared between the different analysis.

sigs.rxnExpr <- fread("./Statistics/results/rxnExpr_LMMsRandomPatient_fullCoefTable.csv")
sigs.rxnExpr <- sigs.rxnExpr[padj < 0.05,]
sigs.PA <- fread("./Statistics/results/PA_LMMsRandomPatient_fullCoefTable.csv")
sigs.PA <- sigs.PA[padj < 0.05,]
sigs.PA[,rxn := rn]
sigs.FVA <- fread("./Statistics/results/FVA_LMMsRandomPatient_fullCoefTable.csv")
sigs.FVA[,padj := p.adjust(`Pr(>|t|)` , method = "BH")]
sigs.FVA <- sigs.FVA[padj < 0.05,]

sigs <- list(rxnExpr = sigs.rxnExpr,
             PA = sigs.PA,
             FVA = sigs.FVA)

# annotations
subsystems <- fread("./Statistics/dat/subsystems.csv")
rxnAnno <- fread("../../resources/recon-store-reactions-1.tsv")

Here is an upset plot for the reactions.

rxns <- unique(c(subsystems[,reaction], unlist(sapply(sigs,function(x) x[,rxn]))))

vennMat <- matrix(0, nrow = length(rxns), ncol = length(sigs), dimnames = list(rxns, names(sigs)))
for(set.nm in names(sigs)){
    set <- sigs[[set.nm]]
    vennMat[set[,rxn],set.nm] <- 1
}

upset(as.data.frame(vennMat), intersect = colnames(vennMat))

There are 29 reactions which are associated in all analysis with HB/Mayo changes. Here is the complete list:

From all the reactions we can make another hypergeometric enrichment - which would be an overall assessment of the analysis performed.

hgt <- enricher(unique(unlist(sapply(sigs, function(x) x[,rxn]))), TERM2GENE = subsystems)
dotplot(hgt, x = "Count", showCategory = nrow(data.frame(hgt)))

2.2 Subsystem association

Similar we can make a summary of the subsystems by comparing the different subsystems which have been enriched in the different analysis and with GSEA and HGT.

gsea.rxnExpr <- readRDS("Statistics/results/rxnExpr_GSEASubsystems.RDS")
gsea.PA <- readRDS("Statistics/results/PA_GSEASubsystems.RDS")
gsea.FVA <- readRDS("Statistics/results/FVA_GSEASubsystems.RDS")

vennMat.subsys <- matrix(0, ncol = 2*length(sigs), nrow = length(unique(subsystems[,subsystem])),
                         dimnames = list(unique(subsystems[,subsystem]),
                                         paste0(sort(rep(c("hgt.","gsea."), length(sigs))), rep(names(sigs),2)))
                         )
for(n in names(sigs)){
    hgt <- enricher(sigs[[n]][,rxn],TERM2GENE = subsystems)
    vennMat.subsys[data.frame(hgt)[,"ID"], paste0("hgt.",n)] <- 1
    gsea <- get(paste0("gsea.",n))
    vennMat.subsys[data.frame(gsea)[,"ID"], paste0("gsea.",n)] <- 1
}

upset(as.data.frame(vennMat.subsys), intersect = colnames(vennMat.subsys))

dt.vennMat.subsys <- data.table(vennMat.subsys, total = rowSums(vennMat.subsys), keep.rownames = TRUE)

DT::datatable(dt.vennMat.subsys[total > 1,])

There is(are) exactly 0 subsystem(s) which is(are) found across all analysis. That is:

tbl.subsys <- rxnAnno[subsystem %in% rownames(vennMat.subsys)[rowSums(vennMat.subsys) == ncol(vennMat.subsys)],]
DT::datatable(tbl.subsys)

2.3 Diversity measures

An interesting finding was, that generally there were more reactions which, if present/active, there was a reduction in disease activity. This can be either an effect that certain reactions are really blocked/reduced in activity if HB/Mayo scores are high or it is a result of a reduced metabolic diversity in higher inflammation states. To test that one can correlate the HB/Mayo score with the diversity measures of the different analysis.

diversity <- fread("Statistics/results/diversityMeasures.csv")
pairs.panels(diversity[,.(Response,HB_Mayo_impu,richness.PA, shannon.rxnExpr, shannon.FVA)],
           method = "spearman",
           stars = TRUE,
           main = "Spearman's rho")

So there are slight correlation, which poses the question, whether this signal remains, if tested in a linear mixed model to correct for PatientID independence.

mod.richness.PA <- lmer(data = diversity,
                        HB_Mayo_impu ~ richness.PA + (1|PatientID))
mod.shannon.rxnExpr <- lmer(data = diversity,
                        HB_Mayo_impu ~ shannon.rxnExpr + (1|PatientID))
mod.shannon.FVA <- lmer(data = diversity,
                        HB_Mayo_impu ~ shannon.FVA + (1|PatientID))


stats <- data.table(rbind(
                          coef(summary(mod.richness.PA)),
                          coef(summary(mod.shannon.rxnExpr)),
                          coef(summary(mod.shannon.FVA))),
                    keep.rownames = TRUE)[rn != "(Intercept)",]
stats[,padj := p.adjust(`Pr(>|t|)`, method = "BH")]
DT::datatable(stats)

There is a decrease of diversity in FVA ranges with the increase in HB/Mayo signal - that is interesting indeed.

3 Combining information from all data

3.1 PCA for clustering

Another idea is to use the different significant reactions and use the data to make a ordination to get a feeling how well we can separate the different samples form each other by HB/Mayo scores.

rxnExpr <- read.csv("../results/RxnExpression/rxnExpr.GL10-L50-GU90.colormore3D.csv", row.names=1)
rxnExpr <- rxnExpr[sigs.rxnExpr[,rxn],]
PA <- read.csv("../results/ModelAnalysis/rxnIdMat.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
PA <- PA[sigs.PA[,rxn],]
# make PA to integer
PA2 <- PA
PA2[,] <- 0
PA2[PA == "True"] <- 1
PA <- PA2
FVA.range <- read.csv("../results/FVA/rangeFVA.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
FVA.range <- FVA.range[sigs.FVA[coef == "range",rxn],]
FVA.center <- read.csv("../results/FVA/centerFVA.GL10-L50-GU90.MatjesAbsorption.colormore3D.csv", row.names=1)
FVA.center <- FVA.center[sigs.FVA[coef == "center",rxn],]


# transform the PA data to jaccard distances, otherwise it is less meaningful
PA.dist <- as.matrix(dist(t(PA), method = "binary"))
mat.all <- t(rbind(rxnExpr, PA.dist, FVA.center, FVA.range))

pca <- prcomp(mat.all, scale = TRUE, center = TRUE)
pca.x <- merge(data.table(pca[["x"]], keep.rownames = TRUE), diversity, by.x = "rn", by.y = "SeqID")
pca.var <- pca[["sdev"]]^2/sum(pca[["sdev"]]^2)*100

pca.plot <- ggplot(pca.x, aes(x=PC1,y=PC2)) +
    scale_color_gradient(low = "blue", high ="red") +
    geom_point(aes(color = HB_Mayo_impu, shape = Diagnosis),size = 3) +
    theme_bw() +
    facet_grid(~Diagnosis) +
    labs(y = paste0("PC2 (", round(pca.var[2],2),"%)"),
         x = paste0("PC1 (", round(pca.var[1],2),"%)"),
         color = "HB/Mayo score")
pca.plot

pca.plot <- ggplot(pca.x, aes(x=PC1,y=PC2)) +
    scale_color_gradient(low = "blue", high ="red") +
    geom_point(aes(color = log10(Time_seq+1), shape =Remission),size = 3) +
    theme_bw() +
    facet_grid(~Diagnosis) +
    labs(y = paste0("PC2 (", round(pca.var[2],2),"%)"),
         x = paste0("PC1 (", round(pca.var[1],2),"%)"))
pca.plot

# only the rxns common in all 
rxns <- rownames(vennMat)[rowSums(vennMat) == ncol(vennMat)]
PA.common.dist <- as.matrix(dist(t(PA[rxns,])))
FVA.common.range  <- FVA.range[rownames(FVA.range) %in% rxns,]
FVA.common.center <- FVA.center[rownames(FVA.center) %in% rxns,]
rxnExpr.common <- rxnExpr[rxns,]
mat.common.all <- t(rbind(rxnExpr.common, PA.common.dist, FVA.common.range))
pca.common <- prcomp(mat.common.all, scale = TRUE, center = TRUE)
#
pca.common.x <- merge(data.table(pca.common[["x"]], keep.rownames = TRUE), diversity, by.x = "rn", by.y = "SeqID")
pca.common.var <- pca.common[["sdev"]]^2/sum(pca.common[["sdev"]]^2)*100
#
pca.common.plot <- ggplot(pca.common.x, aes(x=PC1,y=PC2)) +
    scale_color_gradient(low = "blue", high ="red") +
    geom_point(aes(color = HB_Mayo_impu, shape = Diagnosis),size = 3) +
    theme_bw() +
    facet_grid(~Diagnosis) +
    labs(y = paste0("PC2 (", round(pca.common.var[2],2),"%)"),
         x = paste0("PC1 (", round(pca.common.var[1],2),"%)"),
         color = "HB/Mayo score")
pca.common.plot

4 Finding correlated exchange reactions

In order to detect those metabolites which probably show an influence on the metabolic network and the significant reactions I correlated the values of the significant reactions for presence/absence and FVA center and range to the values of the values to the exchange reactions. I took the mean of the correlation coefficient for each rxn/Ex_rxn pair and kept the top 1% values for each rxn. I further calculated the mean of all coefficients for the significant rxns in the LMMs and multiplied the two values. This left me with a value which describes the strength of the correlation for a rxns to the exchange of a metabolite and the strength of the rxn change to the HB/Mayo score. It thus gives us values to rank the different metabolites to the change in HB/Mayo.

NOTE: I reversed the correlation coefficients and the LMM coefficients for the results of the FVA center analysis in order to comply with a more readily interpretable result. This means more uptake of the metabolite will increase the be positively associated with an increase in the center of the rxn in question.

all.Excors <- fread("Statistics/results/ExCorrelation_summary2.csv")
topcor <- all.Excors[est_src == "HBMayo"]



# filter the exchange reactions, which show a significant effect which is different from 0
topcor.ttests <- topcor[est_src == "HBMayo", .(p.value = tryCatch({
                                                    t.test(score_abs_mean)[["p.value"]]
                                                }, error = function(e){1.0}),
                                                score_mean = mean(score_mean),
                                                score_sum = sum(score_mean),
                                                score_abs_mean = mean(score_abs_mean),
                                                score_absabs_mean = mean(score_absabs_mean), # model estimates and correlations are transformed to positive values
                                                score_abs_sum = sum(score_abs_mean),
                                                score_absabs_sum = sum(score_absabs_mean),
                                                NoRxns = .N,
                                                Metabolite = unique(Metabolite),
                                                r_direction_mean = mean(r_direction),
                                                compartment = unique(compartment),
                                                main.direction = unique(main.direction)
                                                ), by = .(rn)] 
topcor.ttests[,padj := p.adjust(p.value, method = "BH")]

#topcor.ttests[,rxn_base := gsub("\\[.\\]","",rn)]

#rxnAnno[,rxn_base := gsub("\\[.\\]","",abbreviation)]
#topcor.ttests <- merge(topcor.ttests, rxnAnno[,.(rxn_base, description)], by = "rxn_base")
#topcor.ttests[,description := gsub(".* of","", description)]
#topcor.ttests[,Metabolite := factor(description, levels = unique(description[order(HBMayo_score_sum]))]
#topcor.ttests[, compartment := "Colon"]
#topcor.ttests[grep(".*\\[e\\]",rn), compartment := "Blood"]


# get those which have the largest deviation from 0 (based on the ttest p.value)
top100 <- unique(topcor.ttests[order(padj),rn])[1:25]#[padj < 0.05, rn][]
#setkey(topcor.ttests, "rn")


topcor[,Metabolite := factor(Metabolite, levels = unique(Metabolite)[order(topcor[,.(val = mean(score_abs_mean)), by = .(rn)][,val])])]

plt.excor <- ggplot(topcor[est_src == "HBMayo" & rn %in% top100 ,], aes(x = Metabolite, y = score_abs_mean, fill = main.direction)) +
    geom_boxplot() +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 45, hjust = 1 , vjust =1))+
    facet_grid(~compartment, scale = "free_x")
plt.excor

DT::datatable(topcor.ttests)
# another way of plotting would be summing the scores by exchange reaction
topcor.ttests[,Metabolite := factor(Metabolite,levels = unique(Metabolite[order(score_absabs_sum)]))]
topcor.sums.plt <- ggplot(topcor.ttests[order(abs(score_absabs_sum),decreasing = T)[1:25],], 
                          aes(x = score_absabs_sum,
                              y = Metabolite,
                              size = NoRxns,
                              #color = main.direction,
                              shape = compartment
                              )) +
    geom_point() +
    geom_vline(linetype = 2, color = "red", xintercept = 0) +
    labs(x = "Summed importance score", title = "HB/Mayo associated metabolites") +
    theme_bw() 
topcor.sums.plt

topcor.ttests[,Metabolite := factor(Metabolite,levels = unique(Metabolite[order(score_absabs_mean)]))]
topcor.sums.plt <- ggplot(topcor.ttests[order(abs(score_absabs_sum),decreasing = T)[1:25],], 
                          aes(x = score_absabs_mean,
                              y = Metabolite,
                              size = NoRxns,
                              #color = main.direction,
                              shape = compartment
                              )) +
    geom_point() +
    geom_vline(linetype = 2, color = "red", xintercept = 0) +
    labs(x = "Mean importance score", title = "HB/Mayo associated metabolites") +
    theme_bw() 
topcor.sums.plt

# relating these exchange reactions back to the subsystems

topcor.subsystems <- topcor[rn %in% topcor.ttests[padj < 0.05, rn] ,
                            .(score_mean = mean(score_mean,na.rm =TRUE),
                              score_sum = sum(score_mean, na.rm = TRUE),
                              N_rxns = .N,
                              compartment = unique(compartment),
                              Metabolite = unique(Metabolite)
                              ),
                            by=.(rn,subsystem, main.direction)]


ggplot(topcor.subsystems, aes(x=Metabolite, y = subsystem, size = N_rxns,color = score_sum)) +
    geom_point(aes(shape = main.direction)) +
    scale_color_gradient2(high = "red",low = "blue",mid = "light grey",midpoint = 0)+
    theme_bw()+
    ggtitle("Metabolites with no. of targets > 1") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1 , vjust =1))+
    facet_grid(~compartment, scale = "free_x")

#topcor.subsystems <- topcor.merge[rn %in% topcor.ttests[NoRxns == 1, rn] & !is.na(HBMayo_score),
#                            .(HBMayo_score_mean = mean(HBMayo_score,na.rm =TRUE),
#                              HBMayo_score_sum = sum(HBMayo_score, na.rm = TRUE),
#                              N_rxns = .N),
#                            by=.(rn,subsystem,main.direction)]
#topcor.subsystems[, HBMayo_score_direction := (as.integer(HBMayo_score_sum>0)*2)-1]
#topcor.subsystems[, compartment := "Colon"]
#topcor.subsystems[grep(".*\\[e\\]",rn), compartment := "Blood"]
#
#ggplot(topcor.subsystems[HBMayo_score_direction <0,], aes(x=Metabolite, y = subsystem)) +
#    scale_shape_manual(values=c(25,24))+
#    geom_point(aes(shape = as.factor(HBMayo_score_direction), fill = main.direction, color = main.direction)) +
##    scale_fill_gradient2(high = "red",low = "blue",mid = "light grey",midpoint = 0)+
#    theme_bw()+
#   # guides(shape = "none")+
#    ggtitle("Metabolites with exactly 1 target") +
#    theme(axis.text.x = element_text(angle = 45, hjust = 1 , vjust =1, size = 10))+
#    facet_grid(~compartment, scale = "free_x")
#
#ggplot(topcor.subsystems[HBMayo_score_direction >0,], aes(x=Metabolite, y = subsystem)) +
#    scale_shape_manual(values=c(24))+
#    geom_point(aes(shape = as.factor(HBMayo_score_direction), fill = main.direction, color = main.direction)) +
##    scale_fill_gradient2(high = "red",low = "blue",mid = "light grey",midpoint = 0)+
#    theme_bw()+
#   # guides(shape = "none")+
#    ggtitle("Metabolites with exactly 1 target ") +
#    theme(axis.text.x = element_text(angle = 45, hjust = 1 , vjust =1, size = 8))+
#    facet_grid(~compartment, scale = "free_x")